permits <- get_egis_table(db = "housing", table = "tbl_Permit")
2019-03-14 19:56:39: Retrieving table housing.tbl_Permit
Error in ogrInfo(dsn = dsn, layer = layer, encoding = encoding, use_iconv = use_iconv, :
Cannot open layer
First need to join up the real property data (Open Baltimore) the sales data (provided by Steve, and with deed dates from January 1, 2010 through October 2018) so we have a neighborhood for as many sales as we can.
sales <- sales %>% rename(sales.block = Block, sales.lot = Lot)
Error in .f(.x[[i]], ...) : object 'Block' not found
real.prop <- real.prop %>%
mutate(real.block.clean = gsub("^0+", "", real.block),
real.lot.clean = gsub("^0+", "", real.lot))
sales <- sales %>%
mutate(sales.block.clean = gsub("^0+", "", sales.block),
sales.lot.clean = gsub("^0+", "", sales.lot))
sales <- sales %>%
left_join(real.prop,
by = c("sales.block.clean" = "real.block.clean",
"sales.lot.clean" = "real.lot.clean")
)
sales %>% count(is.na(real.block), is.na(real.lot))
1,153 sales didn’t match to a block-lot in the real property table, which means that the block-lot jointly was not in the real prop table.
Also, there are about 16,000 properties in the real prop table that don’t have a neighborhood.
real.prop %>% count(is.na(neighborhood))
So after joining we end up with 9,428 sales that don’t have a neighborhood.
sales %>% count(!is.na(neighborhood))
The real property table also gives if it is principal residence or not, so we’ll also filter for the sales that are for principal residences.
sales %>% count(rescode)
Distribution of city-wide 2018 sales prices:
sales %>%
filter(year(deed.date) == 2018) %>%
ggplot(aes(`Sales Price`)) +
geom_histogram() +
theme_iteam_google_docs() +
xlim(c(0, 500000))
quantile(sales$`Sales Price`, 0.85)
85%
275000
permits %>% glimpse()
Observations: 836,816
Variables: 45
$ ID_Permit <int> 375642224, 375642225, 375642226, 375642227, 37...
$ csm_caseno <fct> 000000001, 000000002, 000000003, 000000004, 00...
$ csm_plan_year <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ csm_plans_number <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ csm_description <fct> issued through building inspection, install ga...
$ csm_expr_date <dttm> 1994-06-06, 1994-06-04, 1993-06-06, 1994-06-0...
$ csm_finaled_date <dttm> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
$ csm_issued_date <dttm> 1993-09-30, 1993-06-14, 1993-06-14, 1993-06-1...
$ csm_name_first <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ csm_name_last <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ csm_name_mi <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ csm_projname <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ csm_recd_by <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ csm_recd_date <dttm> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
$ csm_status <fct> EXP, EXP, EXP, EXP, EXP, EXP, EXP, EXP, EXP, E...
$ csm_frozen <fct> N, N, N, N, N, N, N, N, N, N, F, N, N, N, N, N...
$ csm_auto_cond <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ csm_updateby <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ csm_updated <dttm> 2005-08-07, 2005-08-07, 2005-08-07, 2005-08-0...
$ csm_projno <fct> 000000001, 000000002, 000000003, 000000004, 00...
$ prc_avp_no <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
$ csm_target_date <dttm> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
$ case_type <fct> COM, COM, COM, COM, COM, COM, COM, COM, COM, C...
$ PLANADDRESS <fct> 0000 COUNTER, 1067 CAMERON ROAD, 4266 CLYDESDA...
$ prc_parcel_no <fct> 9948 948, 5142 034, 3575C010, 5164 022, 3355 0...
$ com_type_work <fct> OTH, OTH, AA, OTH, OTH, OTH, OTH, OTH, OTH, AA...
$ com_sprinklers <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ com_existing_use <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ csm_type_work <fct> OTH, OTH, AA, OTH, OTH, OTH, OTH, OTH, OTH, AA...
$ csm_use <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ PlansNum <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ csm_st_name <fct> COUNTER, CAMERON ROAD, CLYDESDALE AVE, KENILWO...
$ csm_st_number <fct> 0000, 1067, 4266, 5313, 2518, 4217, 3527, 0720...
$ csm_st_pfx <fct> NA, NA, NA, NA, NA, NA, NA, NA, E, NA, NA, NA,...
$ prc_block_no <fct> 9948 , 5142, 3575C, 5164, 3355, 5749, 5555, 76...
$ prc_lot <fct> 948, 034, 010, 022, 010, 017, 194, 036, 048, 0...
$ prc_neighborhood <fct> NA, CAMERON VILLAGE, MEDFIELD, KENILWORTH PARK...
$ PlanURL <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ prc_hi_dist <int> NA, 570, 540, 570, 672, 480, 470, 980, 551, 62...
$ csm_cost <dbl> 0, 2000, 2290, 1400, 6600, 5800, 4500, 5350, 7...
$ csm_mastno <fct> 000000001, 000000002, 000000003, 000000004, 00...
$ BlockLot <fct> 9948 948, 5142034, 3575C010, 5164022, 3355010,...
$ csm_id <fct> 200243175721, 200243175722, 200243175723, 2002...
$ Applicant <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
$ Lessee <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
permits %>% count(csm_status)
permits %>% count(csm_type_work)
meet.criteria <- sales %>%
filter(year(deed.date) %in% c(2015, 2016, 2017),
!is.na(neighborhood),
`How Conveyed` == 1,
!grepl("NOT", rescode)) %>%
nrow
We have 16112 samples to work with that are in 2015-2017, have a neighborhood, were an arms-length sale, and are the principal residence.
sales.summary.15_17.by.hood <- sales %>%
filter(year(deed.date) %in% c(2015, 2016, 2017),
!is.na(neighborhood),
`How Conveyed` == 1,
!grepl("NOT", rescode)) %>%
group_by(neighborhood) %>%
summarise(hood.n = n(),
hood.mean = mean(`Sales Price`),
hood.median = median(`Sales Price`),
hood.std = sqrt(sum((`Sales Price`-hood.mean)^2/(hood.n-1))),
hood.95th = quantile(`Sales Price`, probs = .95),
hood.98th = quantile(`Sales Price`, probs = .98),
hood.99th = quantile(`Sales Price`, probs = .99))
sales.summary.15_17.by.hood
Which neighborhoods have less than 20 sales meeting the criteria?
sales.summary.15_17.by.hood %>%
filter(hood.n < 20)
84 neighborhoods have less than 20 sales meeting the criteria. We’ll exclude them going forward so we have a reasonable sample size.
# Join the summaries to the neighborhood boundaries
hoods@data <- hoods@data %>%
left_join(sales.summary.15_17.by.hood,
by = c("label" = "neighborhood"))
sales.hood.98th <- sales %>%
left_join(sales.summary.15_17.by.hood,
by = c("neighborhood" = "neighborhood")) %>%
filter(year(deed.date) == 2018,
hood.n >= 20,
`Sales Price` >= hood.98th,
`How Conveyed` == 1,
!grepl("NOT", rescode)) %>%
arrange(neighborhood)
result.sales <- nrow(sales.hood.98th)
There are 167 sales that meet the following criteria:
(If this yield isn’t high enough we can bump it down to the 95th percentile.)
sales.hood.98th$long <- lapply(sales.hood.98th$location.coordinates, function(x) x[1]) %>% unlist()
sales.hood.98th$lat <- lapply(sales.hood.98th$location.coordinates, function(x) x[2]) %>% unlist()
sales.hood.98th.geo <- sales.hood.98th %>% filter(!is.na(long))
sales.hood.98th.geo <- SpatialPointsDataFrame(
sales.hood.98th.geo %>% select(long, lat),
sales.hood.98th.geo,
proj4string = CRS("+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"))
sales.hood.98th.geo <-
spTransform(
sales.hood.98th.geo,
CRSobj = CRS("+init=epsg:4326 +proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0")
)
library(htmltools)
hoods.labels <- paste0(
hoods$label,
"<br>Median Sales, 2015-2017: ", as.character(hoods$hood.median)
)
sale.labels <- paste0(
sales.hood.98th.geo$`House #`, " ",
sales.hood.98th.geo$`Street Name`, " ",
sales.hood.98th.geo$Suffix,
"<br>Sale Price in 2018: ",
as.character(sales.hood.98th.geo$`Sales Price`),
"<br>New Owner: ", sales.hood.98th.geo$new.owner
)
leaflet() %>%
setView(lng = -76.6, lat = 39.3, zoom = 11) %>%
addProviderTiles(providers$Stamen.TonerLite) %>%
addPolygons(data = hoods,
weight = 2,
color = "black",
opacity = 0.5,
fillOpacity = 0,
label = ~lapply(hoods.labels, HTML)) %>%
addCircleMarkers(data = sales.hood.98th.geo,
radius = 2,
label = ~lapply(sale.labels, HTML))
sales.hood.98th
#look for temporal jumps in prices
sales.hood.99th <- sales %>%
left_join(sales.summary.15_17.by.hood,
by = c("neighborhood" = "neighborhood")) %>%
filter(year(deed.date) == 2018,
hood.n >= 20,
`Sales Price` >= hood.99th,
`How Conveyed` == 1,
!grepl("NOT", rescode)) %>%
arrange(neighborhood)
result.sales <- nrow(sales.hood.99th)
There are 167 sales that meet the following criteria:
(If this yield isn’t high enough we can bump it down to the 95th percentile.)
sales.hood.99th$long <- lapply(sales.hood.99th$location.coordinates, function(x) x[1]) %>% unlist()
sales.hood.99th$lat <- lapply(sales.hood.99th$location.coordinates, function(x) x[2]) %>% unlist()
sales.hood.99th.geo <- sales.hood.99th %>% filter(!is.na(long))
sales.hood.99th.geo <- SpatialPointsDataFrame(
sales.hood.99th.geo %>% select(long, lat),
sales.hood.99th.geo,
proj4string = CRS("+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"))
sales.hood.99th.geo <-
spTransform(
sales.hood.99th.geo,
CRSobj = CRS("+init=epsg:4326 +proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0")
)
library(htmltools)
hoods.labels <- paste0(
hoods$label,
"<br>Median Sales, 2015-2017: ", as.character(hoods$hood.median)
)
sale.labels <- paste0(
sales.hood.99th.geo$`House #`, " ",
sales.hood.99th.geo$`Street Name`, " ",
sales.hood.99th.geo$Suffix,
"<br>Sale Price in 2018: ",
as.character(sales.hood.99th.geo$`Sales Price`),
"<br>New Owner: ", sales.hood.99th.geo$new.owner
)
leaflet() %>%
setView(lng = -76.6, lat = 39.3, zoom = 11) %>%
addProviderTiles(providers$Stamen.TonerLite) %>%
addPolygons(data = hoods,
weight = 2,
color = "black",
opacity = 0.5,
fillOpacity = 0,
label = ~lapply(hoods.labels, HTML)) %>%
addCircleMarkers(data = sales.hood.99th.geo,
radius = 2,
label = ~lapply(sale.labels, HTML))
emily.list <- c("4011 BARRINGTON",
"DORCHESTER",
"2319 MONTICELLO")
sales.hood.98th.geo@data %>%
filter(grepl(paste(emily.list, collapse="|"), propertyaddress))
The following criteria are used below:
permits.recent <- permits %>%
filter(csm_issued_date >= "2017-01-01") %>%
mutate(permit.block.clean = gsub("^0+", "", prc_block_no),
permit.lot.clean = gsub("^0+", "", prc_lot))
permits.recent.summary <- permits.recent %>%
group_by(permit.block.clean, permit.lot.clean) %>%
summarise(permit.count = n(),
permit.total.value = sum(csm_cost, na.rm = T))
mid.hoods <- hmt.hood %>% filter(`Predominant Code Ignoring Non-Residential` %in% c("D", "E", "F", "G", "H"))
sales.99th.mid.hood <- subset(sales.hood.99th.geo, tolower(neighborhood) %in% tolower(mid.hoods$Neighborhood))
sales.99th.mid.hood.over.250k <- subset(sales.99th.mid.hood,
`Sales Price` > 250000)
sales.99th.mid.hood.over.250k@data %>% nrow
[1] 45
sales.99th.mid.hood.over.250k@data <- sales.99th.mid.hood.over.250k@data %>%
left_join(permits.recent.summary,
by = c("sales.block.clean" = "permit.block.clean",
"sales.lot.clean" = "permit.lot.clean"))
Further filter for permit value totals over $10,000.
sales.99th.mid.hood.over.250k.10k.permit <- subset(sales.99th.mid.hood.over.250k, permit.total.value >= 10000)
Results in 27 properties.
sales.99th.mid.hood.over.250k.10k.permit@data %>% nrow
[1] 27
mid.hoods.geo <- subset(hoods,
tolower(label) %in% tolower(mid.hoods$Neighborhood))
mid.hoods.labels <- paste0(
mid.hoods.geo$label,
"<br>Median Sales, 2015-2017: ", as.character(mid.hoods.geo$hood.median)
)
sale.labels <- paste0(
sales.99th.mid.hood.over.250k.10k.permit$`House #`, " ",
sales.99th.mid.hood.over.250k.10k.permit$`Street Name`, " ",
sales.99th.mid.hood.over.250k.10k.permit$Suffix,
"<br>Sale Price in 2018: ",
as.character(sales.99th.mid.hood.over.250k.10k.permit$`Sales Price`),
"<br>New Owner: ", sales.99th.mid.hood.over.250k.10k.permit$new.owner,
"<br>Permits Issued from 2017-2018: ", sales.99th.mid.hood.over.250k.10k.permit$permit.count,
"<br>Total Permit Value from 2017-2018: ", sales.99th.mid.hood.over.250k.10k.permit$permit.total.value
)
leaflet() %>%
setView(lng = -76.6, lat = 39.3, zoom = 11) %>%
addProviderTiles(providers$Stamen.TonerLite) %>%
addPolygons(data = hoods,
weight = 2,
color = "black",
opacity = 0.5,
fillOpacity = 0,
label = ~lapply(hoods.labels, HTML)) %>%
addPolygons(data = mid.hoods.geo,
weight = 2,
#color = "black",
opacity = 0.0,
fillOpacity = .2,
fillColor = iteam.colors[3],
label = ~lapply(mid.hoods.labels, HTML)) %>%
addCircleMarkers(data = sales.99th.mid.hood.over.250k.10k.permit,
color = iteam.colors[1],
opacity = 1,
radius = 2,
label = ~lapply(sale.labels, HTML))
sales.99th.mid.hood.over.250k.10k.permit@data